{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "f1029e76-7e61-4296-801d-3a49637b5972",
   "metadata": {},
   "source": [
    "# Ragas Answer Relevance Evaluation\n",
    "\n",
    "Evaluating model output with Ragas `answer_relevancy` metric."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "64f96a0a-bb6e-4dbf-a37a-1f602429fb47",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip3 install ragas==0.1.13 langchain-openai==0.1.20 datasets==2.20.0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9272217d-bb77-4135-961d-c4a0121f4081",
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip3 show datasets ragas langchain-openai"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7175e70a-abf6-4c49-9a04-4abec23c646f",
   "metadata": {},
   "outputs": [],
   "source": [
    "import ipython_secrets\n",
    "import os\n",
    "\n",
    "OPENAI_API_KEY = ipython_secrets.get_secret('OPENAI_API_KEY')\n",
    "os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3650ce4e-c63a-4f96-968f-bffbfd030d3b",
   "metadata": {},
   "outputs": [],
   "source": [
    "from ragas.metrics import answer_relevancy\n",
    "from ragas import evaluate\n",
    "from datasets import Dataset \n",
    "from langchain_openai.chat_models import ChatOpenAI\n",
    "from langchain_openai.embeddings import OpenAIEmbeddings\n",
    "import os\n",
    "\n",
    "openai_api_key = os.getenv(\"OPENAI_API_KEY\") \n",
    "\n",
    "evaluation_data = [\n",
    "    {\n",
    "        \"input\": \"What should I do in New York City in July?\",\n",
    "        \"output\": \"Check out Times Square, go to an outdoor concert, and visit the Statue of Liberty.\",\n",
    "        \"contexts\": [\n",
    "            \"Times Square is known for its Broadway theaters, bright lights, and bustling atmosphere.\",\n",
    "            \"Outdoor concerts in Central Park are popular summer events attracting many visitors.\",\n",
    "            \"The Statue of Liberty is a symbol of freedom and a must-see landmark in NYC.\"\n",
    "        ]\n",
    "    },\n",
    "    {\n",
    "        \"input\": \"Can you help me with my math homework?\",\n",
    "        \"output\": \"I'm designed to assist with travel queries. For math help, try using online resources like Khan Academy or Mathway.\",\n",
    "        \"contexts\": []\n",
    "    },\n",
    "    {\n",
    "        \"input\": \"What’s the capital of France?\",\n",
    "        \"output\": \"The capital of France is Paris.\",\n",
    "        \"contexts\": [\n",
    "            \"Paris, known as the City of Light, is the most populous city of France.\",\n",
    "            \"European capitals: Paris, France; Berlin, Germany; Madrid, Spain\",\n",
    "        ]\n",
    "    }\n",
    "]\n",
    "\n",
    "# Format our dataset for Ragas data structure\n",
    "def prepare_data_for_ragas(data_list):\n",
    "    data_table = {\n",
    "        'question': [],\n",
    "        'answer': [],\n",
    "        'contexts': []\n",
    "    }\n",
    "    for data_item in data_list:\n",
    "        data_table[\"question\"].append(data_item[\"input\"])\n",
    "        data_table[\"answer\"].append(data_item[\"output\"])\n",
    "        data_table[\"contexts\"].append(data_item[\"contexts\"])\n",
    "    \n",
    "    return data_table\n",
    "\n",
    "def create_report(data):\n",
    "\n",
    "    ragas_dict = prepare_data_for_ragas(data)\n",
    "    dataset = Dataset.from_dict(prepare_data_for_ragas(data))\n",
    "    langchain_llm = ChatOpenAI(\n",
    "        model_name=\"gpt-4o-mini\",\n",
    "        api_key=openai_api_key)\n",
    "    langchain_embeddings = OpenAIEmbeddings(\n",
    "        model=\"text-embedding-3-large\",\n",
    "        api_key=openai_api_key\n",
    "    )\n",
    "    score = evaluate(dataset,\n",
    "                     metrics=[answer_relevancy],\n",
    "                     llm=langchain_llm,\n",
    "                     embeddings=langchain_embeddings\n",
    "                    )\n",
    "    return score\n",
    "\n",
    "# Create and print the table\n",
    "results = create_report(evaluation_data)\n",
    "print(results.to_pandas())\n",
    "print(results)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
